/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     licenses@blazegraph.com

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
package com.bigdata.rdf.lexicon;

import com.bigdata.btree.BTree;
import com.bigdata.btree.BTree.PartitionedCounter;

/**
 * An encoder/decoder for long values formed from a partition identifier in the
 * high word and a local counter in the low word where the low N bits of the
 * long value are reversed and rotated into the high N bits of the long value.
 * <p>
 * The purpose of this encoding is to cause the N high bits to vary rapidly as
 * the local counter is driven up by writes on the index partition. This has the
 * effect of scattering writes on dependent indices (those using the resulting
 * long value as the sole or leading component of their key).
 * <p>
 * Given a source RDF/XML document with M "terms" distributed uniformly over K
 * TERM2ID index partitions, each term has a uniform likelihood of setting any
 * of the low bits of the local counter. After encoding, this means that the N
 * high-bits of encoded term identifier are uniformly distributed. Assuming that
 * the separator keys for the ID2TERM index divide the key space into equally
 * sized key-ranges, then the reads and writes on the ID2TERM index partitions
 * will be uniformly distributed as well.
 * <p>
 * The next bits in the encoded values are derived from the partition identifier
 * followed by the term identifier and therefore have a strong bias for the
 * index partition and the sequential assignment of local counter values within
 * an index partition respectively. This means that read / write access within
 * an index partition tends to have some locality, which improves B+Tree
 * performance through several mechanisms (mainly improved cache effects,
 * reduced copy-on-write for dirty leaves and nodes, and less IO costs).
 * <p>
 * When the #of ID2TERM index partitions GT <code>2^N</code>, only a subset of
 * those index partitions can be directly selected by the N high bits with their
 * uniform distribution. The next bias is the partition identifier, which begins
 * at ZERO (0), is inflated to (0, [1:P]), where P is the #of index partitions
 * generated by a scatter split, and grows relatively slowly thereafter as index
 * partitions are fill up and are split or are moved to redistribute the load on
 * the cluster.
 * 
 * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
 * @version $Id$
 */
public class TermIdEncoder {

    /**
     * The #of low bits from the local counter that will be reversed and
     * written into the high-bits of the encoded long value.
     */
    private final int N;

    /**
     * A mask with the {@link #N} low bits turned on. 
     */
    private final int mask;

    public String toString() {

        return getClass().getName() + "{N=" + N + ", mask="
                + Integer.toBinaryString(mask) + "}";

    }

    /**
     * Return the #of low bits from the local counter that will be reversed and
     * written into the high-bits of the encoded long value.
     */
    public int getNBits() {
        
        return N;
        
    }
    
    /**
     * @param N
     *            The #of low bits from the local counter that will be reversed
     *            and written into the high-bits of the encoded long value.
     */
    public TermIdEncoder(final int N) {

        if (N < 0)
            throw new IllegalArgumentException();

        if (N > 31)
            throw new IllegalArgumentException();

        this.N = N;

        /*
         * Construct the bit mask - this will have zeros in the high bits
         * that correspond to the bits of the localCounter that WILL NOT be
         * reversed and ones in the low bits that correspond to bits of the
         * localCounter that WILL be reversed.
         */
        {

            int mask = 0;

            int bit;

            for (int i = 0; i < N; i++) {

                bit = (1 << i);

                mask |= bit;

            }

            this.mask = mask;

        }

    }

    /**
     * Encode a term identifier using the configured value of
     * {@link #getNBits() NBits}.
     * 
     * @param v
     *            A 64-bit long counter value as generated by an
     *            {@link BTree.PartitionedCounter}.
     * 
     * @return A permutation of that long value in which the low <i>N</i> bits
     *         have been reversed and rotated into the high <i>N</i> bits.
     */
    public long encode(final long v) {
      
        if (v == 0L) {
            // 0L is reserved for NULLs.
            throw new IllegalArgumentException();
        }

        // the partition identifier.
        final long pid = 0xFFFFFFFFL & getPartitionId(v);

        // the local counter.
        final long ctr = 0xFFFFFFFFL & getLocalCounter(v);

        // the output value.
        long u = 0L;

        /*
         * Move pid to high word. 
         */
        u |= pid << (32 - N);

        /*
         * Right shift the counter over the bits that are being reversed,
         * extend to a long value.
         */
        u |= ctr >>> N;

        /*
         * Use the mask to isolate the low-N bits of the counter, which are
         * then reversed into the high-N bits.
         */
        final long rev = Integer.reverse(((int) ctr) & mask);

        /*
         * Overwrite the high N bits of the long value using the reversed
         * low N bits from the local counter.
         */
        u |= rev << 32;

        return u;

    }

    /**
     * Reverses the effect of {@link #encode(long)}.
     * 
     * @param u
     *            An encoded long value.
     * 
     * @return The decode long value.
     */
    public long decode(final long u) {

        // reverse high word and mask to recover the low-N bits.
        final int fwd = Integer.reverse(((int) (u >>> 32))) & mask;

        /*
         * Left-shift to make room for the (un-)reversed bits and then combine
         * them back in.
         */
        final int ctr = ((int) (u << N) | fwd);

        /*
         * Bring the partition identifier back to an int by shifting it the
         * same number of bits in the other direction. 
         */
        final int pid = ((int) (u >>> (32 - N)));

        // reconstruct the long counter value.
        return combine(pid, ctr);

    }

    /**
     * Return the partition identifier from the high word of a partitioned
     * counter.
     * 
     * @param v
     *            The partitioned counter.
     * 
     * @return The high word.
     */
    public static int getPartitionId(final long v) {

        return BTree.PartitionedCounter.getPartitionId(v);
//        return (int) (v >>> 32);

    }

    /**
     * Return the local counter from the low word of a partitioned counter.
     * 
     * @param v
     *            The partitioned counter.
     * 
     * @return The low word.
     */
    public static int getLocalCounter(final long v) {

//        return (int) v;
        return BTree.PartitionedCounter.getLocalCounter(v);

    }

    /**
     * Combines the partition identifier and the local counter using the same
     * logic as the {@link PartitionedCounter}.
     * 
     * @param pid
     *            The partition identifier.
     * @param ctr
     *            The local counter.
     * 
     * @return The long counter assembled from those values.
     * 
     * @see BTree.PartitionedCounter, which performs the same operation and MUST
     *      be consistent with this method.
     */
    public static long combine(final int pid, final int ctr) {

//        return ((long) pid) << 32 | (0xFFFFFFFFL & (long) ctr);
        return BTree.PartitionedCounter.combine(pid, ctr);

    }

    /*
     * Alternative versions used for debugging.  Package private for the unit
     * tests.
     */

    long encode2(final long v1) {
        long v2 = v1 >>> N;
        
        for (int b = 0; b < N; b++) {
            if ((v1 & (1L << b)) != 0) {
                final long sv = 1L << (63 - b);
                v2 |= sv;
            }
        }
        
        return v2;
    }

    long decode2(final long v2) {
        long v1 = v2 << N;
        
        for (int b = 0; b < N; b++) {
            if ((v2 & (1L << (63 - b))) != 0) {
                v1 |= 1L << b;
            }
        }
        
        return v1;
    }

}
